{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Private Predictions with TFE Keras"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Step 1: Public Training with Differential Privacy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To learn about the overal objectives of this tutorial, you can first go through the notebook `a - Public Training without DP`. It's exactly the same objectives except that we will train the model with differential privacy.\n",
    "\n",
    "By training your model with differential privacy, you can ensure that the model is not memorizing sensitive informations about the training set. If the model is not trained with differential privacy, attackers could reveal some private imformations by just querring the deployed model. Two common attacks are [membership inference](https://www.cs.cornell.edu/~shmat/shmat_oak17.pdf) and [model inversion](https://www.cs.cmu.edu/~mfredrik/papers/fjr2015ccs.pdf).\n",
    "\n",
    "To learn more about [TensorFlow Privacy](https://github.com/tensorflow/privacy) you can read this [excellent blog post](http://www.cleverhans.io/privacy/2019/03/26/machine-learning-with-differential-privacy-in-tensorflow.html). Before running this notebook, please install [TensorFlow Privacy](https://github.com/tensorflow/privacy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-11-01 09:39:00.543505: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
      "2022-11-01 09:39:00.547402: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n",
      "2022-11-01 09:39:00.547415: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n"
     ]
    }
   ],
   "source": [
    "\"\"\"Training a CNN on MNIST with Keras and the DP SGD optimizer.\n",
    "source: https://github.com/tensorflow/privacy/blob/master/tutorials/mnist_dpsgd_tutorial_keras.py\n",
    "\"\"\"\n",
    "import dp_accounting\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "\n",
    "from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/5\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-11-01 09:39:12.515235: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n",
      "2022-11-01 09:39:12.515263: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)\n",
      "2022-11-01 09:39:12.515278: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (mpc-empty011122134151.ea134): /proc/driver/nvidia/version does not exist\n",
      "2022-11-01 09:39:12.515506: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "240/240 [==============================] - 269s 1s/step - loss: 1.4785 - accuracy: 0.5530 - val_loss: 0.7667 - val_accuracy: 0.7678\n",
      "Epoch 2/5\n",
      "240/240 [==============================] - 268s 1s/step - loss: 0.6324 - accuracy: 0.8097 - val_loss: 0.5209 - val_accuracy: 0.8494\n",
      "Epoch 3/5\n",
      "240/240 [==============================] - 268s 1s/step - loss: 0.5205 - accuracy: 0.8553 - val_loss: 0.4823 - val_accuracy: 0.8745\n",
      "Epoch 4/5\n",
      "240/240 [==============================] - 268s 1s/step - loss: 0.4973 - accuracy: 0.8748 - val_loss: 0.4694 - val_accuracy: 0.8849\n",
      "Epoch 5/5\n",
      "240/240 [==============================] - 268s 1s/step - loss: 0.4813 - accuracy: 0.8861 - val_loss: 0.4556 - val_accuracy: 0.8945\n",
      "For delta=1e-5, the current epsilon is: 5637.18\n"
     ]
    }
   ],
   "source": [
    "dpsgd = True            # If True, train with DP-SGD\n",
    "learning_rate = 0.15    # Learning rate for training\n",
    "noise_multiplier = 0.1  # Ratio of the standard deviation to the clipping norm\n",
    "l2_norm_clip = 1.0      # Clipping norm\n",
    "batch_size = 250        # Batch size\n",
    "epochs = 5              # Number of epochs\n",
    "microbatches = 250      # Number of microbatches\n",
    "\n",
    "\n",
    "def compute_epsilon(steps):\n",
    "    \"\"\"Computes epsilon value for given hyperparameters.\"\"\"\n",
    "    if noise_multiplier == 0.0:\n",
    "        return float('inf')\n",
    "    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))\n",
    "    accountant = dp_accounting.rdp.RdpAccountant(orders)\n",
    "\n",
    "    sampling_probability = batch_size / 60000\n",
    "    event = dp_accounting.SelfComposedDpEvent(\n",
    "        dp_accounting.PoissonSampledDpEvent(\n",
    "            sampling_probability,\n",
    "            dp_accounting.GaussianDpEvent(noise_multiplier)\n",
    "        ), \n",
    "        steps\n",
    "    )\n",
    "    accountant.compose(event)\n",
    "\n",
    "    # Delta is set to 1e-5 because MNIST has 60000 training points.\n",
    "    return accountant.get_epsilon(target_delta=1e-5)\n",
    "\n",
    "\n",
    "def load_mnist():\n",
    "    \"\"\"Loads MNIST and preprocesses to combine training and validation data.\"\"\"\n",
    "    train, test = tf.keras.datasets.mnist.load_data()\n",
    "    train_data, train_labels = train\n",
    "    test_data, test_labels = test\n",
    "\n",
    "    train_data = np.array(train_data, dtype=np.float32) / 255\n",
    "    test_data = np.array(test_data, dtype=np.float32) / 255\n",
    "\n",
    "    train_data = train_data.reshape((train_data.shape[0], 28, 28, 1))\n",
    "    test_data = test_data.reshape((test_data.shape[0], 28, 28, 1))\n",
    "\n",
    "    train_labels = np.array(train_labels, dtype=np.int32)\n",
    "    test_labels = np.array(test_labels, dtype=np.int32)\n",
    "\n",
    "    train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)\n",
    "    test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)\n",
    "\n",
    "    assert train_data.min() == 0.\n",
    "    assert train_data.max() == 1.\n",
    "    assert test_data.min() == 0.\n",
    "    assert test_data.max() == 1.\n",
    "\n",
    "    return train_data, train_labels, test_data, test_labels\n",
    "\n",
    "if dpsgd and batch_size % microbatches != 0:\n",
    "    raise ValueError('Number of microbatches should divide evenly batch_size')\n",
    "\n",
    "# Load training and test data.\n",
    "train_data, train_labels, test_data, test_labels = load_mnist()\n",
    "\n",
    "# Define a sequential Keras model\n",
    "model = tf.keras.Sequential([\n",
    "    tf.keras.layers.Conv2D(\n",
    "        16,\n",
    "        8,\n",
    "        strides=2,\n",
    "        padding='same',\n",
    "        activation='relu',\n",
    "        input_shape=(28, 28, 1)),\n",
    "    tf.keras.layers.MaxPool2D(2, 1),\n",
    "    tf.keras.layers.Conv2D(\n",
    "        32, 4, strides=2, padding='valid', activation='relu'),\n",
    "    tf.keras.layers.MaxPool2D(2, 1),\n",
    "    tf.keras.layers.Flatten(),\n",
    "    tf.keras.layers.Dense(32, activation='relu'),\n",
    "    tf.keras.layers.Dense(10)\n",
    "])\n",
    "\n",
    "if dpsgd:\n",
    "    optimizer = DPKerasSGDOptimizer(\n",
    "        l2_norm_clip=l2_norm_clip,\n",
    "        noise_multiplier=noise_multiplier,\n",
    "        num_microbatches=microbatches,\n",
    "        learning_rate=learning_rate)\n",
    "        # Compute vector of per-example loss rather than its mean over a minibatch.\n",
    "    loss = tf.keras.losses.CategoricalCrossentropy(\n",
    "        from_logits=True, reduction=tf.losses.Reduction.NONE)\n",
    "else:\n",
    "    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)\n",
    "    loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)\n",
    "\n",
    "# Compile model with Keras\n",
    "model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])\n",
    "\n",
    "# Train model with Keras\n",
    "model.fit(\n",
    "    train_data,\n",
    "    train_labels,\n",
    "    epochs=epochs,\n",
    "    validation_data=(test_data, test_labels),\n",
    "    batch_size=batch_size\n",
    ")\n",
    "\n",
    "# Compute the privacy budget expended.\n",
    "if dpsgd:\n",
    "    eps = compute_epsilon(epochs * 60000 // batch_size)\n",
    "    print('For delta=1e-5, the current epsilon is: %.2f' % eps)\n",
    "else:\n",
    "    print('Trained with vanilla non-private SGD optimizer')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save('short-dnn.h5')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.13 ('tfe2')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  },
  "vscode": {
   "interpreter": {
    "hash": "2b3cf1a821f25a2bafd97d4b4f3a77d0e1f42d8d78c0e512d3a72b703c70416e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
